1 JP Morgan Visualisation Grade

Rubric grade
6-12 word descriptive title is left-justified in upper left corner 1
Subtitle and/or annotations provide additional information 2
Text size is hierarchical and readable 2
Text is horizontal 2
Data are labeled directly 1
Labels are used sparingly 2
Proportions are accurate 2
Data are intentionally ordered 1
Axis intervals are equidistant 2
Graph is two-dimensional 2
Display is free from decoration 2
Color scheme is intentional 0
Color is used to highlight key patterns 0
Color is legible when printed in black and white 0
Color is legible for people with colorblindness 0
Text sufficiently contrasts background 2
Gridlines, if present, are muted 1
Graph does not have border line 2
Axes do not have unnecessary tick marks or axis lines 2
Graph has one horizontal and one vertical axis 2
Graph highlights significant finding or conclusion 2
The type of graph is appropriate for data 2
Graph has appropriate level of precision 2
Individual chart elements work together to reinforce the overarching takeaway message 2

2 Worst Possible Visualisation

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

stop_search_2021_worst <- readr::read_csv(here::here("data","stop-search","2021-09","2021-09-metropolitan-stop-and-search.csv"))%>%
  janitor::clean_names()
## Rows: 14319 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): Type, Gender, Age range, Self-defined ethnicity, Officer-defined e...
## dbl  (2): Latitude, Longitude
## lgl  (4): Part of a policing operation, Policing operation, Outcome linked t...
## dttm (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stop_search_2021_worst%>%
  ggplot()+
  geom_bar(aes(x = officer_defined_ethnicity,
               fill = type))+
  labs(title = "Number of Stop and Searches by Type and Ethnicity",
       y = "",
       x = "Ethnicity")

3 MET Police

library(readxl)
library(dplyr)
library(stringr)
# load 2021 September data
stop_search_2021 <- readr::read_csv(here::here("data","stop-search","2021-09","2021-09-metropolitan-stop-and-search.csv"))
## Rows: 14319 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): Type, Gender, Age range, Self-defined ethnicity, Officer-defined e...
## dbl  (2): Latitude, Longitude
## lgl  (4): Part of a policing operation, Policing operation, Outcome linked t...
## dttm (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ward_population <- read_excel(path = here::here("data","/London-wards-2018_ESRI/CT0225_2011 Census - Age by ethnic group (based on CT0010) by sex - London HT wards.xlsx"),
                              sheet = "CT0225 - All usual residents",
                              skip = 11,
                              col_names = T,
                              range = "A11:VA674")%>%
  janitor::clean_names()
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
for(i in 4:573){
  if(!is.na(ward_population[1,i])){
    temp <- ward_population[1,i]
  }
  else{
    ward_population[1,i] <- temp
  }
}

names(ward_population)[4:length(ward_population)] <- paste0(ward_population[1,],"_",ward_population[2,])[4:length(ward_population)]

ward_population <- ward_population%>%
  janitor::clean_names()

ward_population <- ward_population[-c(1,2),]

ward_population <- ward_population%>%
  # rename(area_code = x1,
  #        area_name = x2,
  #        total_population = x3)%>%
  mutate(area_code = case_when(!is.na(x1) ~ str_split_fixed(ward_population$x1," ",2)[,1],
                               TRUE ~ str_split_fixed(ward_population$x2," ",2)[,1]),
         .after = x1,
         area_name = case_when(!is.na(x1) ~ str_split_fixed(ward_population$x1," ",2)[,2],
                               TRUE ~ str_split_fixed(ward_population$x2," ",2)[,2]),
         population_total = x3)

ward_population <-  subset(ward_population, select = -c(x1,x2,x3))


indx_black <- grepl('black', colnames(ward_population))

black_pop_total<-rowSums(data.frame(lapply(ward_population[which(indx_black)], as.numeric)))

ward_population_no_age <- ward_population%>%
  mutate(black_population = black_pop_total,
         population_total = as.numeric(population_total))%>%
  select(area_code,
         area_name,
         population_total,
         black_population)%>%
  mutate(prc_black = black_population/population_total)
skimr::skim(stop_search_2021)
Data summary
Name stop_search_2021
Number of rows 14319
Number of columns 15
_______________________
Column type frequency:
character 8
logical 4
numeric 2
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Type 0 1.00 13 25 0 3 0
Gender 177 0.99 4 6 0 3 0
Age range 2035 0.86 5 8 0 5 0
Self-defined ethnicity 171 0.99 13 84 0 17 0
Officer-defined ethnicity 316 0.98 5 5 0 4 0
Legislation 0 1.00 30 55 0 4 0
Object of search 44 1.00 8 35 0 8 0
Outcome 0 1.00 6 31 0 6 0

Variable type: logical

skim_variable n_missing complete_rate mean count
Part of a policing operation 0 1 0 FAL: 14319
Policing operation 14319 0 NaN :
Outcome linked to object of search 14319 0 NaN :
Removal of more than just outer clothing 14319 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Latitude 1582 0.89 51.50 0.06 51.2 51.46 51.51 51.55 51.69 ▁▁▅▇▁
Longitude 1582 0.89 -0.11 0.14 -1.5 -0.19 -0.10 -0.02 0.27 ▁▁▁▇▆

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
Date 0 1 2021-08-31 23:00:00 2021-09-30 22:59:00 2021-09-16 09:44:00 8643
library(leaflet)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.3, PROJ 7.2.1
library(ggplot2)
library(dplyr)
library(leaflet.extras)


# read in the shapefile, transform it into long lat format
wards <- st_read(here::here("data/London-wards-2018_ESRI/London_Ward_CityMerged.shp"))
## Reading layer `London_Ward_CityMerged' from data source 
##   `/Users/kazmernagy-betegh/Library/Mobile Documents/com~apple~CloudDocs/LBS/AM10_Data Visualisation and Storytelling/am10.mam2022/data/London-wards-2018_ESRI/London_Ward_CityMerged.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 633 features and 6 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## Projected CRS: OSGB 1936 / British National Grid
wards <- st_transform(wards,crs=4326)

# transform points to sf
stops_sf <- st_as_sf(stop_search_2021%>%select(Longitude, Latitude)%>%na.omit,coords = c('Longitude',"Latitude"), crs = st_crs(wards))

# intersection of polygons and points
stop_locations <- stops_sf %>% 
  mutate(intersection = as.integer(st_intersects(geometry, wards$geometry)),
         area = if_else(is.na(intersection), '', wards$NAME[intersection])) 

# split geometry in coordinates
stop_locations <- stop_locations%>%
  mutate(X= st_coordinates(geometry)[,1],
         Y= st_coordinates(geometry)[,2])

# join areas to stop search
stop_search_2021 <- left_join(stop_search_2021, stop_locations, by = c("Longitude" = "X", "Latitude" = "Y" ))

stop_search_2021_wards <- left_join(stop_search_2021, wards, by = c("area"= "NAME"))

stop_search_2021_wards <- stop_search_2021_wards%>%
  rename(point_geometry = geometry.x,
         geometry = geometry.y)

# stop_search_2021_wards <- stop_search_2021_wards%>%select(-c("geometry"))


stop_search_2021_wards_pop <- left_join(stop_search_2021_wards,ward_population_no_age, by = c("area" = "area_name"))

stop_search_2021_wards_pop <- stop_search_2021_wards_pop%>%
  janitor::clean_names()

# stop_search_2021_wards <- st_transform(stop_search_2021_wards,crs=4326)

prc_balck_stops_per_area <- stop_search_2021_wards_pop%>%
  filter(!is.na(area), area != "", !is.na(officer_defined_ethnicity))%>%
  group_by(area, officer_defined_ethnicity)%>%
  summarise(ethnic_stops = n())%>%
  mutate(prc_ethnic_stops = ethnic_stops/sum(ethnic_stops))%>%
  filter(officer_defined_ethnicity == "Black")
## `summarise()` has grouped output by 'area'. You can override using the `.groups` argument.
prc_balck_stops_per_area <- merge(prc_balck_stops_per_area, data.frame(wards$NAME), by.x = "area", by.y = "wards.NAME", all.y = T)

pal <-  colorNumeric("OrRd", stop_locations$intersection)


map_london <- leaflet()%>%
  addTiles(
    options = tileOptions(minZoom = 10, maxZoom = 15)
    )%>%
  addControl("London Stop and Search Frequency", position = 'bottomleft')%>%
  setMaxBounds(lng1 = -0.147949,
               lng2 = -0.117949,
               lat1 = 51.20775,
               lat2 = 51.70775)%>%
  addPolygons(data = wards,
              color = 'blue',
              fillOpacity = 0.05,
              weight = 0.5,
              fill = ,
              popup = ~paste0(NAME," num. stops: ",stop_locations$intersection[stop_locations$area == NAME],
                              "; ","Black Population: ",round(stop_search_2021_wards_pop$prc_black[which(stop_search_2021_wards_pop$area == NAME)]*100,2),"%",
                              "; "))%>%
  addHeatmap(group = "heat",
             data = stop_locations%>%na.omit,
             lng = ~as.numeric(stop_locations$X),
             lat = ~as.numeric(stop_locations$Y),
             intensity = stop_locations$intersection,
             radius = 6,
             minOpacity = 0.08,
             max = 0.7,
             gradient = "OrRd")%>%
  addLegend(values = stop_locations$intersection%>%na.omit,
            group = "heat",
            pal =  colorNumeric("OrRd",stop_locations$intersection),
            title = "Number of Stop and Searches")
## Warning in stop_locations$area == NAME: longer object length is not a multiple
## of shorter object length
## Warning in stop_search_2021_wards_pop$area == NAME: longer object length is not
## a multiple of shorter object length
map_london
library(tidyr)

london_ethnic_dist <- data.frame(as.factor(c("White", "Black", "Asian", "Other")),
                                 c(59.8,18.4,13.3, 8.4))

colnames(london_ethnic_dist) <- c("ethnicity", "prc")
  
plot1 <- stop_search_2021%>%
  janitor::clean_names()%>%
  filter(!is.na(officer_defined_ethnicity), !is.na(self_defined_ethnicity))%>%
  group_by(officer_defined_ethnicity)%>%
  summarise(num_stops = n())%>%
  mutate(prc_stops = round(num_stops/sum(num_stops)*100,2))%>%
  mutate(prc = c(18.4,13.3, 8.4, 59.8))%>%
  pivot_longer(cols = 3:4, names_to = "type", names_repair = "unique", values_to = "prc")%>%
  ggplot()+
  geom_col(aes(y = reorder(officer_defined_ethnicity, prc),
               x = prc,
               fill = type),
           position = "dodge")+
  geom_text(aes(y = reorder(officer_defined_ethnicity,prc),  
                x = prc, 
                label = paste0(prc,"%"),
                group = type),
            position = position_dodge(width = 1),
            fontface = 2)+
  theme_minimal()+
  theme(panel.grid.major = element_blank(),
        plot.caption.position = "plot",
        plot.caption = element_text(vjust = 2, hjust = 0))+
  labs(title = "40% of Stop and Searches conducted on 13% of Londons population",
       y = "",
       x = "% of Stop and Search Conducted in 2021 September",
       caption = "NOTE: Ethnicity Breakdown of London from Wikipedia")+
  scale_fill_manual(values=c("skyblue", "tomato"), 
                       name="% distribution",
                       
                       labels=c("Ethnic Distribution of London", "Stop and Search Ethnic Distribution"))
  

plot1

plot2 <- stop_search_2021%>%
  janitor::clean_names()%>%
  filter(!is.na(officer_defined_ethnicity), !is.na(self_defined_ethnicity))%>%
  mutate(self_id = case_when(grepl("Black",self_defined_ethnicity)~"Black",
                             grepl("White",self_defined_ethnicity)~"White",
                             grepl("Asian",self_defined_ethnicity)~"Asian",
                             TRUE ~ "Other"))%>%
  pivot_longer(cols = c(self_id, officer_defined_ethnicity), names_to = "classificaiton_type", values_to = "ethnicity")%>%
  group_by(classificaiton_type, ethnicity)%>%
  summarise(num_stops = n())%>%
  mutate(prc_stops = round(num_stops/sum(num_stops)*100,2))%>%
  ggplot()+
  geom_col(aes(y = reorder(ethnicity, prc_stops),
               x = prc_stops,
               fill = classificaiton_type),
           position = "dodge")+
  geom_text(aes(y = reorder(ethnicity,prc_stops),  
                x = prc_stops, 
                label = paste0(prc_stops,"%"),
                group = classificaiton_type),
            position = position_dodge(width = 1),
            fontface = 2)+
  theme_minimal()+
  theme(panel.grid.major = element_blank(),
        plot.caption.position = "plot",
        plot.caption = element_text(vjust = 2, hjust = 0))+
  labs(title = "Only 63% of People Identified as Black by Officers Self Identify as that",
       y = "",
       x = "% of Stop and Search Conducted in 2021 September")+
  scale_fill_manual(values=c( "tomato", "skyblue"), 
                       name="",
                       
                       labels=c("Self Defined Ethnicity", "Officer Defined Ethicity"))
## `summarise()` has grouped output by 'classificaiton_type'. You can override using the `.groups` argument.
plot2